lab 13

Author

Madeleine

NEON MAG DATA REPORT

library(tidyverse)
library(lubridate)
library(DT)
library(viridis)
library(janitor)
library(plotly)
library(respirometry)
soilMAG_data <- read.csv("NEON_soilMAGs_soilChem.csv")

head(soilMAG_data)
  X         genomicsSampleID decimalLatitude decimalLongitude elevation
1 1 BONA_001-O-20230710-COMP        65.17445        -147.4782     374.1
2 2 BONA_001-O-20230710-COMP        65.17445        -147.4782     374.1
3 3 BONA_001-O-20230710-COMP        65.17445        -147.4782     374.1
4 4 BONA_001-O-20230710-COMP        65.17445        -147.4782     374.1
5 5 BONA_001-O-20230710-COMP        65.17445        -147.4782     374.1
6 6 BONA_001-O-20230710-COMP        65.17445        -147.4782     374.1
  soilTemp soilMoisture soilInWaterpH siteID        bin_oid         bin_id
1      8.1        1.546      3.993976   BONA  3300078357_s0  3300078357_s0
2      8.1        1.546      3.993976   BONA 3300078357_s10 3300078357_s10
3      8.1        1.546      3.993976   BONA 3300078357_s12 3300078357_s12
4      8.1        1.546      3.993976   BONA 3300078357_s13 3300078357_s13
5      8.1        1.546      3.993976   BONA 3300078357_s14 3300078357_s14
6      8.1        1.546      3.993976   BONA 3300078357_s19 3300078357_s19
                                                                  site
1 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
2 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
3 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
4 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
5 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
6 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
          sample_name subplot layer quadrant     date img_genome_id bin_quality
1 BONA_070-O-20230712      70     O       NA 20230712    3300078357          MQ
2 BONA_070-O-20230712      70     O       NA 20230712    3300078357          MQ
3 BONA_070-O-20230712      70     O       NA 20230712    3300078357          MQ
4 BONA_070-O-20230712      70     O       NA 20230712    3300078357          MQ
5 BONA_070-O-20230712      70     O       NA 20230712    3300078357          MQ
6 BONA_070-O-20230712      70     O       NA 20230712    3300078357          MQ
                                                                                                            bin_lineage
1                                                                                             Bacteria; Acidobacteriota
2                                              Bacteria; Acidobacteriota; Terriglobia; Terriglobales; Acidobacteriaceae
3                                                                                                              Bacteria
4       Bacteria; Pseudomonadota; Alphaproteobacteria; Rhodospirillales; Acetobacteraceae; Acidisoma; Acidisoma sp. L85
5                                                                                Bacteria; Acidobacteriota; Terriglobia
6 Bacteria; Pseudomonadota; Gammaproteobacteria; Lysobacterales; Rhodanobacteraceae; Luteibacter; Luteibacter sp. OK325
                                                                                               gtdb_taxonomy_lineage
1                                        Bacteria; Acidobacteriota; Terriglobia; Acidoferrales; UBA7541; Acidoferrum
2                                Bacteria; Acidobacteriota; Terriglobia; Terriglobales; Acidobacteriaceae; PALSA-350
3                                           Bacteria; Myxococcota; Myxococcia; Myxococcales; Myxococcaceae; JADGRA01
4 Bacteria; Pseudomonadota; Alphaproteobacteria; Acetobacterales; Acetobacteraceae; Acidisoma; Acidisoma sp009765865
5                                          Bacteria; Acidobacteriota; Terriglobia; Acidoferrales; UBA7541; Palsa-295
6                    Bacteria; Pseudomonadota; Gammaproteobacteria; Xanthomonadales; Rhodanobacteraceae; Luteibacter
    domain          phylum               class           order
1 Bacteria Acidobacteriota         Terriglobia   Acidoferrales
2 Bacteria Acidobacteriota         Terriglobia   Terriglobales
3 Bacteria     Myxococcota          Myxococcia    Myxococcales
4 Bacteria  Pseudomonadota Alphaproteobacteria Acetobacterales
5 Bacteria Acidobacteriota         Terriglobia   Acidoferrales
6 Bacteria  Pseudomonadota Gammaproteobacteria Xanthomonadales
              family       genus
1            UBA7541 Acidoferrum
2  Acidobacteriaceae   PALSA-350
3      Myxococcaceae    JADGRA01
4   Acetobacteraceae   Acidisoma
5            UBA7541   Palsa-295
6 Rhodanobacteraceae Luteibacter
                                                                   bin_methods
1 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
2 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
3 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
4 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
5 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
6 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
    created_by date_added bin_completeness bin_contamination average_coverage
1 IMG_PIPELINE 2025-01-24            95.64              4.79               NA
2 IMG_PIPELINE 2025-01-24            62.45              2.88               NA
3 IMG_PIPELINE 2025-01-24            90.00              0.04               NA
4 IMG_PIPELINE 2025-01-24            87.72              1.90               NA
5 IMG_PIPELINE 2025-01-24            79.33              1.17               NA
6 IMG_PIPELINE 2025-01-24            80.17              2.58               NA
  total_number_of_bases x5s_r_rna x16s_r_rna x23s_r_rna t_rna_genes gene_count
1               4541925         0          2          1          43       4073
2               4850766         0          0          1          31       4301
3               4108080         1          0          0          49       4046
4               3014437         1          0          1          27       3091
5               3187874         0          0          0          24       3086
6               3917029         0          0          0          34       3943
  scaffold_count gold_study_id                  community type sample_unknown
1            163     Gs0166454 Soil microbial communities Soil             NA
2            269     Gs0166454 Soil microbial communities Soil             NA
3            394     Gs0166454 Soil microbial communities Soil             NA
4            281     Gs0166454 Soil microbial communities Soil             NA
5            247     Gs0166454 Soil microbial communities Soil             NA
6            466     Gs0166454 Soil microbial communities Soil             NA

Environmental Correlations

show the relationship between environmental conditions and samples

Elevation

df <- read.csv("NEON_soilMAGs_soilChem.csv")  
subset_idelev <- df[, c("img_genome_id", "elevation")]

head(subset_idelev)
  img_genome_id elevation
1    3300078357     374.1
2    3300078357     374.1
3    3300078357     374.1
4    3300078357     374.1
5    3300078357     374.1
6    3300078357     374.1
write.csv(subset_idelev, "genome_elevation.csv", row.names = FALSE)
library(ggplot2)

ggplot(subset_idelev, aes(x = img_genome_id, y = elevation)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  labs(x = "Genome ID", y = "Elevation") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(subset_idelev, aes(x = img_genome_id, y = elevation)) +
  geom_point(color = "darkgreen", size = 3) +
  labs(x = "Genome ID", y = "Elevation") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Soil Moisture

df <- read.csv("NEON_soilMAGs_soilChem.csv")  
subset_idmois <- df[, c("img_genome_id", "soilMoisture")]

head(subset_idmois)
  img_genome_id soilMoisture
1    3300078357        1.546
2    3300078357        1.546
3    3300078357        1.546
4    3300078357        1.546
5    3300078357        1.546
6    3300078357        1.546
write.csv(subset_idmois, "genome_soilMoisture.csv", row.names = FALSE)
library(ggplot2)

ggplot(subset_idmois, aes(x = img_genome_id, y = soilMoisture)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(x = "Genome ID", y = "Soil Moisture") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(subset_idmois, aes(x = img_genome_id, y = soilMoisture)) +
  geom_point(color = "blue", size = 3) +
  labs(x = "Genome ID", y = "Soil Moisture") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Soil Temperature

df <- read.csv("NEON_soilMAGs_soilChem.csv") 
subset_idtemp <- df[, c("img_genome_id", "soilTemp")]

head(subset_idtemp)
  img_genome_id soilTemp
1    3300078357      8.1
2    3300078357      8.1
3    3300078357      8.1
4    3300078357      8.1
5    3300078357      8.1
6    3300078357      8.1
write.csv(subset_idtemp, "genome_soilTemp.csv", row.names = FALSE)
library(ggplot2)

ggplot(subset_idtemp, aes(x = img_genome_id, y = soilTemp)) +
  geom_point(color = "darkred", size = 3) +
  labs(x = "Genome ID", y = "Soil Temperature") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Biogeography and spatial patterns

Mapping phylum distributions

shows the distribution of the phylum of the samples collected

df <- read.csv("NEON_soilMAGs_soilChem.csv") 
location_table <- df[, c("phylum", "decimalLatitude", "decimalLongitude")]

head(location_table)
           phylum decimalLatitude decimalLongitude
1 Acidobacteriota        65.17445        -147.4782
2 Acidobacteriota        65.17445        -147.4782
3     Myxococcota        65.17445        -147.4782
4  Pseudomonadota        65.17445        -147.4782
5 Acidobacteriota        65.17445        -147.4782
6  Pseudomonadota        65.17445        -147.4782
#install.packages(c("ggplot2", "maps", "mapdata"))

library(ggplot2)
library(maps)
library(mapdata)
us_map <- map_data("state")
df <- read.csv("NEON_soilMAGs_soilChem.csv") 
mags_clean <- df %>%
  filter(decimalLongitude >= -125, decimalLongitude <= -66,
         decimalLatitude >= 25, decimalLatitude <= 50)

ggplot() +
  geom_polygon(data = us_map,
               aes(x = long, y = lat, group = group),
               fill = "gray90", color = "black") +
  geom_point(data = mags_clean,
             aes(x = decimalLongitude, y = decimalLatitude, color = phylum),
             size = 10, alpha = 0.7) +
  coord_quickmap() +
  theme_minimal() +
  labs(title = "MAGs Found Across the United States",
       x = "Longitude", y = "Latitude",
       color = "Phylum")

  theme_minimal() +
  theme(
    axis.title.x = element_text(size = 150),  
    axis.title.y = element_text(size = 150), 
    axis.text.x  = element_text(size = 100),  
    axis.text.y  = element_text(size = 100)   
  )
List of 136
 $ line                            :List of 6
  ..$ colour       : chr "black"
  ..$ linewidth    : num 0.5
  ..$ linetype     : num 1
  ..$ lineend      : chr "butt"
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ rect                            :List of 5
  ..$ fill         : chr "white"
  ..$ colour       : chr "black"
  ..$ linewidth    : num 0.5
  ..$ linetype     : num 1
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ text                            :List of 11
  ..$ family       : chr ""
  ..$ face         : chr "plain"
  ..$ colour       : chr "black"
  ..$ size         : num 11
  ..$ hjust        : num 0.5
  ..$ vjust        : num 0.5
  ..$ angle        : num 0
  ..$ lineheight   : num 0.9
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ title                           : NULL
 $ aspect.ratio                    : NULL
 $ axis.title                      : NULL
 $ axis.title.x                    :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : num 150
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 2.75points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.x.top                :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 0
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 2.75points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.x.bottom             : NULL
 $ axis.title.y                    :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : num 150
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : num 90
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 2.75points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.y.left               : NULL
 $ axis.title.y.right              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : num -90
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.75points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text                       :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : chr "grey30"
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x                     :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : num 100
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 2.2points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x.top                 :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 0
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 2.2points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x.bottom              : NULL
 $ axis.text.y                     :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : num 100
  ..$ hjust        : num 1
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.y.left                : NULL
 $ axis.text.y.right               :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.2points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.theta                 : NULL
 $ axis.text.r                     :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0.5
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 2.2points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.ticks                      : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ axis.ticks.x                    : NULL
 $ axis.ticks.x.top                : NULL
 $ axis.ticks.x.bottom             : NULL
 $ axis.ticks.y                    : NULL
 $ axis.ticks.y.left               : NULL
 $ axis.ticks.y.right              : NULL
 $ axis.ticks.theta                : NULL
 $ axis.ticks.r                    : NULL
 $ axis.minor.ticks.x.top          : NULL
 $ axis.minor.ticks.x.bottom       : NULL
 $ axis.minor.ticks.y.left         : NULL
 $ axis.minor.ticks.y.right        : NULL
 $ axis.minor.ticks.theta          : NULL
 $ axis.minor.ticks.r              : NULL
 $ axis.ticks.length               : 'simpleUnit' num 2.75points
  ..- attr(*, "unit")= int 8
 $ axis.ticks.length.x             : NULL
 $ axis.ticks.length.x.top         : NULL
 $ axis.ticks.length.x.bottom      : NULL
 $ axis.ticks.length.y             : NULL
 $ axis.ticks.length.y.left        : NULL
 $ axis.ticks.length.y.right       : NULL
 $ axis.ticks.length.theta         : NULL
 $ axis.ticks.length.r             : NULL
 $ axis.minor.ticks.length         : 'rel' num 0.75
 $ axis.minor.ticks.length.x       : NULL
 $ axis.minor.ticks.length.x.top   : NULL
 $ axis.minor.ticks.length.x.bottom: NULL
 $ axis.minor.ticks.length.y       : NULL
 $ axis.minor.ticks.length.y.left  : NULL
 $ axis.minor.ticks.length.y.right : NULL
 $ axis.minor.ticks.length.theta   : NULL
 $ axis.minor.ticks.length.r       : NULL
 $ axis.line                       : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ axis.line.x                     : NULL
 $ axis.line.x.top                 : NULL
 $ axis.line.x.bottom              : NULL
 $ axis.line.y                     : NULL
 $ axis.line.y.left                : NULL
 $ axis.line.y.right               : NULL
 $ axis.line.theta                 : NULL
 $ axis.line.r                     : NULL
 $ legend.background               : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ legend.margin                   : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
  ..- attr(*, "unit")= int 8
 $ legend.spacing                  : 'simpleUnit' num 11points
  ..- attr(*, "unit")= int 8
 $ legend.spacing.x                : NULL
 $ legend.spacing.y                : NULL
 $ legend.key                      : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ legend.key.size                 : 'simpleUnit' num 1.2lines
  ..- attr(*, "unit")= int 3
 $ legend.key.height               : NULL
 $ legend.key.width                : NULL
 $ legend.key.spacing              : 'simpleUnit' num 5.5points
  ..- attr(*, "unit")= int 8
 $ legend.key.spacing.x            : NULL
 $ legend.key.spacing.y            : NULL
 $ legend.frame                    : NULL
 $ legend.ticks                    : NULL
 $ legend.ticks.length             : 'rel' num 0.2
 $ legend.axis.line                : NULL
 $ legend.text                     :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ legend.text.position            : NULL
 $ legend.title                    :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ legend.title.position           : NULL
 $ legend.position                 : chr "right"
 $ legend.position.inside          : NULL
 $ legend.direction                : NULL
 $ legend.byrow                    : NULL
 $ legend.justification            : chr "center"
 $ legend.justification.top        : NULL
 $ legend.justification.bottom     : NULL
 $ legend.justification.left       : NULL
 $ legend.justification.right      : NULL
 $ legend.justification.inside     : NULL
 $ legend.location                 : NULL
 $ legend.box                      : NULL
 $ legend.box.just                 : NULL
 $ legend.box.margin               : 'margin' num [1:4] 0cm 0cm 0cm 0cm
  ..- attr(*, "unit")= int 1
 $ legend.box.background           : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ legend.box.spacing              : 'simpleUnit' num 11points
  ..- attr(*, "unit")= int 8
  [list output truncated]
 - attr(*, "class")= chr [1:2] "theme" "gg"
 - attr(*, "complete")= logi TRUE
 - attr(*, "validate")= logi TRUE

Spatial autocorrelation

shows the spatial correlation of genus richness (if sites with more genus richness are closer to other sites with genus richness) a positive slope indicates that high richness sites occur near each other.

library(dplyr)

df_unique <- df %>%
  distinct(decimalLatitude, decimalLongitude, .keep_all = TRUE)
library(dplyr)
library(sf)
library(spdep)

df_unique <- df %>%
  distinct(decimalLatitude, decimalLongitude, .keep_all = TRUE)


df_sites <- df %>%
  group_by(decimalLatitude, decimalLongitude) %>%
  summarise(
    genus_richness  = n_distinct(genus),
    family_richness = n_distinct(family),
    order_richness  = n_distinct(order),
    .groups = "drop"
  )

df_sf <- st_as_sf(df_sites, coords = c("decimalLongitude", "decimalLatitude"), crs = 4326)

df_sf <- st_transform(df_sf, 3857)

coords <- st_coordinates(df_sf)
knn_nb <- knn2nb(knearneigh(coords, k = 5))  

lw <- nb2listw(knn_nb, style = "W")

moran.test(df_sf$genus_richness, lw)

    Moran I test under randomisation

data:  df_sf$genus_richness  
weights: lw    

Moran I statistic standard deviate = 28.4, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
      1.000000000      -0.003731343       0.001249102 
moran.test(df_sf$family_richness, lw)

    Moran I test under randomisation

data:  df_sf$family_richness  
weights: lw    

Moran I statistic standard deviate = 28.358, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
      1.000000000      -0.003731343       0.001252803 
moran.test(df_sf$order_richness, lw)

    Moran I test under randomisation

data:  df_sf$order_richness  
weights: lw    

Moran I statistic standard deviate = 28.346, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
      1.000000000      -0.003731343       0.001253881 
library(spdep)

moran.plot(df_sf$genus_richness, lw,
           main = "Moran Scatterplot: Genus Richness",
           xlab = "Genus Richness",
           ylab = "Spatial Lag of Genus Richness")

Community composition and diversity

Taxonomic profiling

Shows us the phylum composition of the samples of each site

library(dplyr)

df_rel_abund <- df %>%
  group_by(siteID, phylum) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(siteID) %>%
  mutate(rel_abund = count / sum(count))
library(ggplot2)

ggplot(df_rel_abund, aes(x = siteID, y = rel_abund, fill = phylum)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Taxonomic Profile by Site",
    x = "Site ID",
    y = "Relative Abundance",
    fill = "Phylum"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "right"
  )

Co-occurrence and network analysis

Taxonomic co-occurrence heat map

Shows which genera are more likely to occur close to each other

library(dplyr)
library(tidyr)
library(ggplot2)
library(pheatmap)
df <- read.csv("NEON_soilMAGs_soilChem.csv") 

# Step 1: Create a presence/absence matrix of taxa by sample
taxa_matrix <- df %>%
  select(genomicsSampleID, genus) %>%
  mutate(present = 1) %>%
  distinct() %>%
  pivot_wider(names_from = genus, values_from = present, values_fill = 0)

# Step 2: Compute co-occurrence (pairwise correlations between genera)
# Remove sample ID column
taxa_only <- taxa_matrix %>% select(-genomicsSampleID)

# Remove genera with no variation across samples
taxa_filtered <- taxa_only[, apply(taxa_only, 2, sd) != 0]

# Recompute correlation
co_occurrence <- cor(taxa_filtered, method = "spearman")

# Replace NA/NaN/Inf with 0 or remove problematic rows/columns
co_occurrence[!is.finite(co_occurrence)] <- 0


# Step 3: Plot heatmap
pheatmap(co_occurrence,
         clustering_distance_rows = "euclidean",
         clustering_distance_cols = "euclidean",
         clustering_method = "ward.D2",
         main = "Genus Co-occurrence Heatmap",
         color = colorRampPalette(c("blue", "white", "red"))(50))